package com.ld.shieldsb.common.core.util.sensitiveWord.impl;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.ld.shieldsb.common.core.util.SensitiveWordUtil;
import com.ld.shieldsb.common.core.util.StringUtils;
import com.ld.shieldsb.common.core.util.sensitiveWord.SensitiveWordHandler;
import com.ld.shieldsb.common.core.util.sensitiveWord.SensitiveWordResult;

import lombok.Data;

public class TextSensitiveWordHandler implements SensitiveWordHandler {

    public SensitiveWordResult deal(String value, String replaceChar) {
        SensitiveWordResult result = new SensitiveWordResult();
        boolean contains = SensitiveWordUtil.contains(value);
        if (contains) {
            String sensitiveWord = SensitiveWordUtil.getSensitiveWordStr(value.toString());
            result.setSensitiveWordStr(sensitiveWord);
            result.setFilteredContent(SensitiveWordUtil.replaceSensitiveWord(value, replaceChar));
        }
        result.setOriginalContent(value);
        result.setContains(contains);
        return result;
    }

    public static void main(String[] args) {
        Set<String> sensitiveWordSet = new HashSet<>();
        sensitiveWordSet.add("一党专制");
        // 初始化敏感词库
        SensitiveWordUtil.init(sensitiveWordSet);
        String str = "一_党_专_制_的国家";
        SensitiveWordResult result = TextSensitiveWordHandler.filter(filterPunctation(str), "*");
        System.err.println(result);
    }

    @SuppressWarnings("rawtypes")
    private static SensitiveWordResult filter(PunctuationOrHtmlFilteredResult pohResult, String replacement) {
        StringBuffer sentence = pohResult.getFilteredString();
        ArrayList<Integer> charOffsets = pohResult.getCharOffsets();
        StringBuffer resultString = new StringBuffer(pohResult.getOriginalString());
        StringBuffer badWords = new StringBuffer();
        int start = 0;
        int end = 0;
        for (int i = 0; i < sentence.length(); i++) {
            char word;
            start = i;
            end = i;
            Map nowMap = SensitiveWordUtil.getSensitiveWordMap();
            for (int j = i; j < sentence.length(); j++) {
                word = sentence.charAt(j);
                // 获取指定key
                nowMap = (Map) nowMap.get(word);
                if (nowMap != null) {// 存在，则判断是否为最后一个
                    // 找到相应key，匹配标识+1
                    // 如果为最后一个匹配规则,结束循环，返回匹配标识数
                    if ("1".equals(nowMap.get("isEnd"))) {
                        // 结束标志位为true
                        end = j;
                    }
                } else {// 不存在，直接返回
                    break;
                }
            }

            if (end > start) {
                for (int k = start; k <= end; k++) {
                    resultString.setCharAt(charOffsets.get(k), replacement.charAt(0)); // 是敏感词的用符号替换掉
                }
                if (badWords.length() > 0) {
                    badWords.append(",");
                }
                badWords.append(sentence.substring(start, end + 1));
                i = end;
            }

        }
        SensitiveWordResult result = new SensitiveWordResult();
        result.setOriginalContent(pohResult.getOriginalString());
        result.setFilteredContent(resultString.toString());
        result.setSensitiveWordStr(badWords.toString());
        if (StringUtils.isNotEmpty(badWords.toString())) {
            result.setContains(true);
        } else {
            result.setContains(false);
        }
        return result;
    }

    private static PunctuationOrHtmlFilteredResult filterPunctation(String originalString) {
        StringBuffer filteredString = new StringBuffer();
        ArrayList<Integer> charOffsets = new ArrayList<Integer>();
        for (int i = 0; i < originalString.length(); i++) {
            String c = String.valueOf(originalString.charAt(i));
            if (!isPunctuationChar(c)) {
                filteredString.append(c);
                charOffsets.add(i);
            }
        }
        PunctuationOrHtmlFilteredResult result = new PunctuationOrHtmlFilteredResult();
        result.setOriginalString(originalString);
        result.setFilteredString(filteredString);
        result.setCharOffsets(charOffsets);
        return result;
    }

    // 用 Unicode 表示的正则,匹配各种除字母以外的符号
    // 大写 P 表示 Unicode 字符集七个字符属性之一：标点字符。
    // L：字母；
    // M：标记符号（一般不会单独出现）；
    // Z：分隔符（比如空格、换行等）；
    // S：符号（比如数学符号、货币符号等）；
    // N：数字（比如阿拉伯数字、罗马数字等）；
    // C：其他字符
    private static boolean isPunctuationChar(String c) {
        String regex = "[\\pP\\pZ\\pS\\pM\\pC]";
        Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
        Matcher m = p.matcher(c);
        return m.find();
    }

    @Data
    private static class PunctuationOrHtmlFilteredResult {
        private String originalString; // 原始字符串
        private StringBuffer filteredString; // 过滤掉html的字符串
        private ArrayList<Integer> charOffsets; // 字节

    }

}
